From 99d21671d53202f929237ac761a06860328f9744 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Wed, 30 Mar 2005 18:17:26 +0000 Subject: [PATCH] bitkeeper revision 1.1236.1.164 (424aed36IaX4v3-NGAT_MnCdUsD1yQ) Split mmu_update() hypercall into mmu_update() and mmuext_op(). All MMUEXT_* ops are now done via the latter hypercall. This allows more arguments to be passed in a nicer way to mmuext operations. Linux 2.4, 2.6 and control tools all use the new interface. The BSDs will need some work, but shouldn't be too hard (and they can be moved to writable pagetables at the same time :-) ). Signed-off-by: Keir Fraser --- .../i386-xen/i386-xen/xen_machdep.c | 2 +- .../i386-xen/xen/netfront/xn_netfront.c | 2 +- .../arch/xen/kernel/traps.c | 10 +- linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c | 19 +- linux-2.4.29-xen-sparse/mm/memory.c | 2 +- .../arch/xen/i386/kernel/traps.c | 9 +- .../arch/xen/i386/mm/hypervisor.c | 84 +- .../arch/xen/i386/mm/ioremap.c | 24 +- .../drivers/xen/blkback/blkback.c | 2 +- .../drivers/xen/netback/netback.c | 61 +- .../drivers/xen/netfront/netfront.c | 3 +- .../drivers/xen/privcmd/privcmd.c | 24 +- .../drivers/xen/usbback/usbback.c | 2 +- .../include/asm-xen/asm-i386/pgtable.h | 3 +- .../include/asm-xen/hypervisor.h | 25 +- .../sys/arch/xen/xen/if_xennet.c | 6 +- tools/libxc/xc_linux_build.c | 9 +- tools/libxc/xc_linux_restore.c | 15 +- tools/libxc/xc_plan9_build.c | 15 +- tools/libxc/xc_private.c | 46 +- tools/libxc/xc_private.h | 3 + tools/libxc/xc_vmx_build.c | 9 +- xen/arch/x86/mm.c | 880 ++++++++++-------- xen/arch/x86/x86_32/entry.S | 2 + xen/arch/x86/x86_64/entry.S | 1 + xen/include/public/xen.h | 137 +-- 26 files changed, 754 insertions(+), 641 deletions(-) diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c index 874f87fdda..4fa020f531 100644 --- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c +++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c @@ -540,7 +540,7 @@ mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma) MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping; MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va; MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma; - MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG; + MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG_LOCAL; mcl_increment_idx(); } diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c index 23e762e304..40d9e4636e 100644 --- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c +++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c @@ -440,7 +440,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc) PT_UPDATES_FLUSH(); /* After all PTEs have been zapped we blow away stale TLB entries. */ - xn_rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB; + xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_LOCAL; /* Give away a batch of pages. */ xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op; diff --git a/linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c b/linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c index ada06dd973..dc9220dfe0 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/traps.c @@ -316,15 +316,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) ); if ( ldt == 0 ) { - mmu_update_t u; - u.ptr = MMU_EXTENDED_COMMAND; - u.ptr |= (unsigned long)&default_ldt[0]; - u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT); - if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0) ) - { - show_trace(NULL); - panic("Failed to install default LDT"); - } + xen_set_ldt((unsigned long)&default_ldt[0], 5); return; } } diff --git a/linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c b/linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c index 34c95c84b5..2f3db057d9 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c +++ b/linux-2.4.29-xen-sparse/arch/xen/mm/ioremap.c @@ -113,12 +113,7 @@ int direct_remap_area_pages(struct mm_struct *mm, int i; unsigned long start_address; #define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; - - u[0].ptr = MMU_EXTENDED_COMMAND; - u[0].val = MMUEXT_SET_FOREIGNDOM; - u[0].val |= (unsigned long)domid << 16; - v = w = &u[1]; + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u; start_address = address; @@ -130,11 +125,11 @@ int direct_remap_area_pages(struct mm_struct *mm, __direct_remap_area_pages( mm, start_address, address-start_address, - w); + u); - if ( HYPERVISOR_mmu_update(u, v - u, NULL) < 0 ) + if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 ) return -EFAULT; - v = w; + v = u; start_address = address; } @@ -149,14 +144,14 @@ int direct_remap_area_pages(struct mm_struct *mm, v++; } - if ( v != w ) + if ( v != u ) { /* get the ptep's filled in */ __direct_remap_area_pages(mm, start_address, address-start_address, - w); - if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0) ) + u); + if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) ) return -EFAULT; } diff --git a/linux-2.4.29-xen-sparse/mm/memory.c b/linux-2.4.29-xen-sparse/mm/memory.c index 880b6981c4..883a2928ab 100644 --- a/linux-2.4.29-xen-sparse/mm/memory.c +++ b/linux-2.4.29-xen-sparse/mm/memory.c @@ -911,7 +911,7 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr { #ifdef CONFIG_XEN if ( likely(vma->vm_mm == current->mm) ) { - HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG); + HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG_LOCAL); } else { set_pte(page_table, entry); flush_tlb_page(vma, address); diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c index f56957f6e6..a6615b7e18 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c @@ -465,14 +465,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code) unsigned long ldt; __asm__ __volatile__ ("sldt %0" : "=r" (ldt)); if (ldt == 0) { - mmu_update_t u; - u.ptr = MMU_EXTENDED_COMMAND; - u.ptr |= (unsigned long)&default_ldt[0]; - u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT); - if (unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0)) { - show_trace(NULL, (unsigned long *)&u); - panic("Failed to install default LDT"); - } + xen_set_ldt((unsigned long)&default_ldt[0], 5); return; } } diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c index 525576243b..1ac796d9fd 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -52,7 +52,7 @@ void xen_l1_entry_update(pte_t *ptr, unsigned long val) mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = val; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } void xen_l2_entry_update(pmd_t *ptr, pmd_t val) @@ -60,79 +60,79 @@ void xen_l2_entry_update(pmd_t *ptr, pmd_t val) mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = pmd_val_ma(val); - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } -void xen_pt_switch(unsigned long ptr) +void xen_machphys_update(unsigned long mfn, unsigned long pfn) { mmu_update_t u; - u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND; - u.val = MMUEXT_NEW_BASEPTR; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + u.val = pfn; + BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); +} + +void xen_pt_switch(unsigned long ptr) +{ + struct mmuext_op op; + op.cmd = MMUEXT_NEW_BASEPTR; + op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_tlb_flush(void) { - mmu_update_t u; - u.ptr = MMU_EXTENDED_COMMAND; - u.val = MMUEXT_TLB_FLUSH; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_TLB_FLUSH_LOCAL; + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_invlpg(unsigned long ptr) { - mmu_update_t u; - u.ptr = (ptr & PAGE_MASK) | MMU_EXTENDED_COMMAND; - u.val = MMUEXT_INVLPG; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_INVLPG_LOCAL; + op.linear_addr = ptr & PAGE_MASK; + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_pin(unsigned long ptr) { - mmu_update_t u; - u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND; - u.val = MMUEXT_PIN_L2_TABLE; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_PIN_L2_TABLE; + op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_unpin(unsigned long ptr) { - mmu_update_t u; - u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND; - u.val = MMUEXT_UNPIN_TABLE; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_UNPIN_TABLE; + op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pte_pin(unsigned long ptr) { - mmu_update_t u; - u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND; - u.val = MMUEXT_PIN_L1_TABLE; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_PIN_L1_TABLE; + op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pte_unpin(unsigned long ptr) { - mmu_update_t u; - u.ptr = phys_to_machine(ptr) | MMU_EXTENDED_COMMAND; - u.val = MMUEXT_UNPIN_TABLE; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_UNPIN_TABLE; + op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_set_ldt(unsigned long ptr, unsigned long len) { - mmu_update_t u; - u.ptr = ptr | MMU_EXTENDED_COMMAND; - u.val = (len << MMUEXT_CMD_SHIFT) | MMUEXT_SET_LDT; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); -} - -void xen_machphys_update(unsigned long mfn, unsigned long pfn) -{ - mmu_update_t u; - u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - u.val = pfn; - BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL) < 0); + struct mmuext_op op; + op.cmd = MMUEXT_SET_LDT; + op.linear_addr = ptr; + op.nr_ents = len; + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } #ifdef CONFIG_XEN_PHYSDEV_ACCESS diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c index 8a0df417ed..7e58a230fc 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c @@ -108,7 +108,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l if(!PageReserved(page)) return NULL; - domid = DOMID_LOCAL; + domid = DOMID_SELF; } /* @@ -393,15 +393,7 @@ int direct_remap_area_pages(struct mm_struct *mm, int i; unsigned long start_address; #define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; - - v = w = &u[0]; - if (domid != DOMID_LOCAL) { - u[0].ptr = MMU_EXTENDED_COMMAND; - u[0].val = MMUEXT_SET_FOREIGNDOM; - u[0].val |= (unsigned long)domid << 16; - v = w = &u[1]; - } + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u; start_address = address; @@ -413,11 +405,11 @@ int direct_remap_area_pages(struct mm_struct *mm, __direct_remap_area_pages(mm, start_address, address-start_address, - w); + u); - if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0) + if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) return -EFAULT; - v = w; + v = u; start_address = address; } @@ -432,13 +424,13 @@ int direct_remap_area_pages(struct mm_struct *mm, v++; } - if (v != w) { + if (v != u) { /* get the ptep's filled in */ __direct_remap_area_pages(mm, start_address, address-start_address, - w); - if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0)) + u); + if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) return -EFAULT; } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c index ab9fc3c977..21b9540f19 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c @@ -100,7 +100,7 @@ static void fast_flush_area(int idx, int nr_pages) mcl[i].args[2] = 0; } - mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; + mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c index 248711646b..9079ea2d47 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c @@ -38,8 +38,9 @@ static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); static struct timer_list net_timer; static struct sk_buff_head rx_queue; -static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2]; -static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3]; +static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1]; +static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; +static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE]; static unsigned char rx_notify[NR_EVENT_CHANNELS]; /* Don't currently gate addition of an interface to the tx scheduling list. */ @@ -195,8 +196,9 @@ static void net_rx_action(unsigned long unused) netif_t *netif; s8 status; u16 size, id, evtchn; - mmu_update_t *mmu; multicall_entry_t *mcl; + mmu_update_t *mmu; + struct mmuext_op *mmuext; unsigned long vdata, mdata, new_mfn; struct sk_buff_head rxq; struct sk_buff *skb; @@ -207,6 +209,7 @@ static void net_rx_action(unsigned long unused) mcl = rx_mcl; mmu = rx_mmu; + mmuext = rx_mmuext; while ( (skb = skb_dequeue(&rx_queue)) != NULL ) { netif = netdev_priv(skb->dev); @@ -229,25 +232,26 @@ static void net_rx_action(unsigned long unused) */ phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn; - mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - mmu[0].val = __pa(vdata) >> PAGE_SHIFT; - mmu[1].ptr = MMU_EXTENDED_COMMAND; - mmu[1].val = MMUEXT_SET_FOREIGNDOM; - mmu[1].val |= (unsigned long)netif->domid << 16; - mmu[2].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND; - mmu[2].val = MMUEXT_REASSIGN_PAGE; + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = vdata; + mcl->args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL; + mcl->args[2] = 0; + mcl++; - mcl[0].op = __HYPERVISOR_update_va_mapping; - mcl[0].args[0] = vdata; - mcl[0].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL; - mcl[0].args[2] = 0; - mcl[1].op = __HYPERVISOR_mmu_update; - mcl[1].args[0] = (unsigned long)mmu; - mcl[1].args[1] = 3; - mcl[1].args[2] = 0; + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)mmuext; + mcl->args[1] = 1; + mcl->args[2] = 0; + mcl->args[3] = netif->domid; + mcl++; - mcl += 2; - mmu += 3; + mmuext->cmd = MMUEXT_REASSIGN_PAGE; + mmuext->mfn = mdata >> PAGE_SHIFT; + mmuext++; + + mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu->val = __pa(vdata) >> PAGE_SHIFT; + mmu++; __skb_queue_tail(&rxq, skb); @@ -259,12 +263,19 @@ static void net_rx_action(unsigned long unused) if ( mcl == rx_mcl ) return; - mcl[-2].args[2] = UVMF_FLUSH_TLB; + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)rx_mmu; + mcl->args[1] = mmu - rx_mmu; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + mcl++; + + mcl[-3].args[2] = UVMF_TLB_FLUSH_ALL; if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) ) BUG(); mcl = rx_mcl; - mmu = rx_mmu; + mmuext = rx_mmuext; while ( (skb = __skb_dequeue(&rxq)) != NULL ) { netif = netdev_priv(skb->dev); @@ -272,7 +283,7 @@ static void net_rx_action(unsigned long unused) /* Rederive the machine addresses. */ new_mfn = mcl[0].args[1] >> PAGE_SHIFT; - mdata = ((mmu[2].ptr & PAGE_MASK) | + mdata = ((mmuext[0].mfn << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK)); atomic_set(&(skb_shinfo(skb)->dataref), 1); @@ -308,7 +319,7 @@ static void net_rx_action(unsigned long unused) dev_kfree_skb(skb); mcl += 2; - mmu += 3; + mmuext += 1; } while ( notify_nr != 0 ) @@ -418,7 +429,7 @@ static void net_tx_action(unsigned long unused) mcl++; } - mcl[-1].args[2] = UVMF_FLUSH_TLB; + mcl[-1].args[2] = UVMF_TLB_FLUSH_ALL; if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) ) BUG(); diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c index 91fe4e5975..3b27381b9f 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c @@ -388,7 +388,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) } /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB; + rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_ALL; /* Give away a batch of pages. */ rx_mcl[i].op = __HYPERVISOR_dom_mem_op; @@ -588,6 +588,7 @@ static int netif_poll(struct net_device *dev, int *pbudget) mcl->args[0] = (unsigned long)rx_mmu; mcl->args[1] = mmu - rx_mmu; mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; mcl++; (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c index 219b218920..017ed4a477 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c @@ -98,7 +98,6 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, struct vm_area_struct *vma = find_vma( current->mm, msg[j].va ); - if ( !vma ) return -EINVAL; @@ -123,8 +122,7 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, case IOCTL_PRIVCMD_MMAPBATCH: { -#define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; + mmu_update_t u; privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; unsigned long *p, addr; @@ -145,39 +143,31 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, if ( (m.addr + (m.num< vma->vm_end ) { ret = -EFAULT; goto batch_err; } - u[0].ptr = MMU_EXTENDED_COMMAND; - u[0].val = MMUEXT_SET_FOREIGNDOM; - u[0].val |= (unsigned long)m.dom << 16; - v = w = &u[1]; - p = m.arr; addr = m.addr; for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ ) { - if ( get_user(mfn, p) ) return -EFAULT; - v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot); + u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot); __direct_remap_area_pages(vma->vm_mm, addr, PAGE_SIZE, - v); - - if ( unlikely(HYPERVISOR_mmu_update(u, v - u + 1, NULL) < 0) ) - put_user( 0xF0000000 | mfn, p ); + &u); - v = w; + if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) ) + put_user(0xF0000000 | mfn, p); } ret = 0; break; batch_err: - printk(KERN_ALERT "XXX SMH: ERROR IN MMAPBATCH\n"); printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n", - ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end); + ret, vma, m.addr, m.num, m.arr, + vma ? vma->vm_start : 0, vma ? vma->vm_end : 0); break; } break; diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c index b039b4506b..72a6be3a2f 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c @@ -195,7 +195,7 @@ static void fast_flush_area(int idx, int nr_pages) mcl[i].args[2] = 0; } - mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; + mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); } diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h index 325cf468b5..714a247de3 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h @@ -407,7 +407,7 @@ extern void noexec_setup(const char *str); do { \ if (__dirty) { \ if ( likely((__vma)->vm_mm == current->mm) ) { \ - HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG); \ + HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG_LOCAL); \ } else { \ xen_l1_entry_update((__ptep), (__entry).pte_low); \ flush_tlb_page((__vma), (__address)); \ @@ -455,7 +455,6 @@ void make_pages_writable(void *va, unsigned int nr); #define kern_addr_valid(addr) (1) #endif /* !CONFIG_DISCONTIGMEM */ -#define DOMID_LOCAL (0xFFFFU) int direct_remap_area_pages(struct mm_struct *mm, unsigned long address, unsigned long machine_addr, diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h index 5fe90fb87f..9bc3e4997b 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h @@ -126,16 +126,33 @@ HYPERVISOR_set_trap_table( static inline int HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count) + mmu_update_t *req, int count, int *success_count, domid_t domid) { int ret; - unsigned long ign1, ign2, ign3; + unsigned long ign1, ign2, ign3, ign4; __asm__ __volatile__ ( TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count) + "3" (success_count), "4" (domid) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + int ret; + unsigned long ign1, ign2, ign3, ign4; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) + : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), + "3" (success_count), "4" (domid) : "memory" ); return ret; diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c index e1063b1775..687b21080b 100644 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c @@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id) xpq_flush_queue(); /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB; + rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL; /* Give away a batch of pages. */ rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; @@ -681,7 +681,7 @@ xen_network_handler(void *arg) mcl->op = __HYPERVISOR_update_va_mapping; mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va; mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW; - mcl->args[2] = UVMF_FLUSH_TLB; // 0; + mcl->args[2] = UVMF_TLB_FLUSH_LOCAL; // 0; mcl++; xpmap_phys_to_machine_mapping @@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc) xpq_flush_queue(); /* After all PTEs have been zapped we blow away stale TLB entries. */ - rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB; + rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL; /* Give away a batch of pages. */ rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c index 5e3d474ab0..469861b786 100644 --- a/tools/libxc/xc_linux_build.c +++ b/tools/libxc/xc_linux_build.c @@ -254,8 +254,7 @@ static int setup_guest(int xc_handle, * Pin down l2tab addr as page dir page - causes hypervisor to provide * correct protection for the page */ - if ( add_mmu_update(xc_handle, mmu, - l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) ) + if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) ) goto error_out; start_info = xc_map_foreign_range( @@ -447,10 +446,16 @@ int xc_linux_build(int xc_handle, memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg)); /* No callback handlers. */ +#if defined(__i386__) ctxt->event_callback_cs = FLAT_KERNEL_CS; ctxt->event_callback_eip = 0; ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; ctxt->failsafe_callback_eip = 0; +#elif defined(__x86_64__) + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_eip = 0; + ctxt->syscall_callback_eip = 0; +#endif memset( &launch_op, 0, sizeof(launch_op) ); diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c index 938f219ec7..220890d346 100644 --- a/tools/libxc/xc_linux_restore.c +++ b/tools/libxc/xc_linux_restore.c @@ -422,10 +422,8 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt) { if ( pfn_type[i] == (L1TAB|LPINTAB) ) { - if ( add_mmu_update(xc_handle, mmu, - (pfn_to_mfn_table[i]< 8192) || (ctxt.ldt_base > HYPERVISOR_VIRT_START) || diff --git a/tools/libxc/xc_plan9_build.c b/tools/libxc/xc_plan9_build.c index c6778d44bc..3476136196 100755 --- a/tools/libxc/xc_plan9_build.c +++ b/tools/libxc/xc_plan9_build.c @@ -314,8 +314,7 @@ setup_guest(int xc_handle, * Pin down l2tab addr as page dir page - causes hypervisor to provide * correct protection for the page */ - if (add_mmu_update(xc_handle, mmu, - l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE)) + if (pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom)) goto error_out; for (count = 0; count < tot_pages; count++) { @@ -526,10 +525,16 @@ xc_plan9_build(int xc_handle, memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg)); /* No callback handlers. */ - ctxt->event_callback_cs = FLAT_KERNEL_CS; - ctxt->event_callback_eip = 0; - ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; +#if defined(__i386__) + ctxt->event_callback_cs = FLAT_KERNEL_CS; + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; + ctxt->failsafe_callback_eip = 0; +#elif defined(__x86_64__) + ctxt->event_callback_eip = 0; ctxt->failsafe_callback_eip = 0; + ctxt->syscall_callback_eip = 0; +#endif memset(&launch_op, 0, sizeof (launch_op)); diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c index 37342ac837..386a240178 100644 --- a/tools/libxc/xc_private.c +++ b/tools/libxc/xc_private.c @@ -92,24 +92,54 @@ unsigned int get_pfn_type(int xc_handle, /*******************/ -#define FIRST_MMU_UPDATE 1 +int pin_table( + int xc_handle, unsigned int type, unsigned long mfn, domid_t dom) +{ + int err = 0; + struct mmuext_op op; + privcmd_hypercall_t hypercall; + + op.cmd = type; + op.mfn = mfn; + + hypercall.op = __HYPERVISOR_mmuext_op; + hypercall.arg[0] = (unsigned long)&op; + hypercall.arg[1] = 1; + hypercall.arg[2] = 0; + hypercall.arg[3] = dom; + + if ( mlock(&op, sizeof(op)) != 0 ) + { + PERROR("Could not lock mmuext_op"); + err = 1; + goto out; + } + + if ( do_xen_hypercall(xc_handle, &hypercall) < 0 ) + { + ERROR("Failure when submitting mmu updates"); + err = 1; + } + + (void)munlock(&op, sizeof(op)); + + out: + return err; +} static int flush_mmu_updates(int xc_handle, mmu_t *mmu) { int err = 0; privcmd_hypercall_t hypercall; - if ( mmu->idx == FIRST_MMU_UPDATE ) + if ( mmu->idx == 0 ) return 0; - mmu->updates[0].ptr = MMU_EXTENDED_COMMAND; - mmu->updates[0].val = MMUEXT_SET_FOREIGNDOM; - mmu->updates[0].val |= (unsigned long)mmu->subject << 16; - hypercall.op = __HYPERVISOR_mmu_update; hypercall.arg[0] = (unsigned long)mmu->updates; hypercall.arg[1] = (unsigned long)mmu->idx; hypercall.arg[2] = 0; + hypercall.arg[3] = mmu->subject; if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 ) { @@ -124,7 +154,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu) err = 1; } - mmu->idx = FIRST_MMU_UPDATE; + mmu->idx = 0; (void)munlock(mmu->updates, sizeof(mmu->updates)); @@ -137,7 +167,7 @@ mmu_t *init_mmu_updates(int xc_handle, domid_t dom) mmu_t *mmu = malloc(sizeof(mmu_t)); if ( mmu == NULL ) return mmu; - mmu->idx = FIRST_MMU_UPDATE; + mmu->idx = 0; mmu->subject = dom; return mmu; } diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h index b3ad75375a..78f8bbe194 100644 --- a/tools/libxc/xc_private.h +++ b/tools/libxc/xc_private.h @@ -212,4 +212,7 @@ void xc_map_memcpy(unsigned long dst, char *src, unsigned long size, int xch, u32 dom, unsigned long *parray, unsigned long vstart); +int pin_table( + int xc_handle, unsigned int type, unsigned long mfn, domid_t dom); + #endif /* __XC_PRIVATE_H__ */ diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c index b1937b76ba..d1692e61f8 100644 --- a/tools/libxc/xc_vmx_build.c +++ b/tools/libxc/xc_vmx_build.c @@ -333,8 +333,7 @@ static int setup_guest(int xc_handle, * Pin down l2tab addr as page dir page - causes hypervisor to provide * correct protection for the page */ - if ( add_mmu_update(xc_handle, mmu, - l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) ) + if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) ) goto error_out; if ((boot_paramsp = xc_map_foreign_range( @@ -612,10 +611,16 @@ int xc_vmx_build(int xc_handle, memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg)); /* No callback handlers. */ +#if defined(__i386__) ctxt->event_callback_cs = FLAT_KERNEL_CS; ctxt->event_callback_eip = 0; ctxt->failsafe_callback_cs = FLAT_KERNEL_CS; ctxt->failsafe_callback_eip = 0; +#elif defined(__x86_64__) + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_eip = 0; + ctxt->syscall_callback_eip = 0; +#endif memset( &launch_op, 0, sizeof(launch_op) ); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index c76dd791bc..946ea05e03 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -111,6 +111,13 @@ #define MEM_LOG(_f, _a...) ((void)0) #endif +/* + * Both do_mmuext_op() and do_mmu_update(): + * We steal the m.s.b. of the @count parameter to indicate whether this + * invocation of do_mmu_update() is resuming a previously preempted call. + */ +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) + static int alloc_l2_table(struct pfn_info *page); static int alloc_l1_table(struct pfn_info *page); static int get_page_from_pagenr(unsigned long page_nr, struct domain *d); @@ -128,7 +135,7 @@ static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); static struct { #define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */ #define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */ - unsigned long deferred_ops; + unsigned int deferred_ops; /* If non-NULL, specifies a foreign subject domain for some operations. */ struct domain *foreign; } __cacheline_aligned percpu_info[NR_CPUS]; @@ -199,12 +206,16 @@ void write_ptbase(struct exec_domain *ed) write_cr3(pagetable_val(ed->arch.monitor_table)); } -static void __invalidate_shadow_ldt(struct exec_domain *d) + +static inline void invalidate_shadow_ldt(struct exec_domain *d) { int i; unsigned long pfn; struct pfn_info *page; + if ( d->arch.shadow_ldt_mapcnt == 0 ) + return; + d->arch.shadow_ldt_mapcnt = 0; for ( i = 16; i < 32; i++ ) @@ -223,13 +234,6 @@ static void __invalidate_shadow_ldt(struct exec_domain *d) } -static inline void invalidate_shadow_ldt(struct exec_domain *d) -{ - if ( d->arch.shadow_ldt_mapcnt != 0 ) - __invalidate_shadow_ldt(d); -} - - static int alloc_segdesc_page(struct pfn_info *page) { struct desc_struct *descs; @@ -1251,401 +1255,409 @@ int new_guest_cr3(unsigned long pfn) return okay; } -static int do_extended_command(unsigned long ptr, unsigned long val) +static void process_deferred_ops(unsigned int cpu) { - int okay = 1, cpu = smp_processor_id(); - unsigned int cmd = val & MMUEXT_CMD_MASK, type; - unsigned long pfn = ptr >> PAGE_SHIFT; - struct pfn_info *page = &frame_table[pfn]; - struct exec_domain *ed = current; - struct domain *d = ed->domain, *e; - u32 x, y, _d, _nd; - domid_t domid; - grant_ref_t gntref; - - switch ( cmd ) - { - case MMUEXT_PIN_L1_TABLE: - /* - * We insist that, if you pin an L1 page, it's the first thing that - * you do to it. This is because we require the backptr to still be - * mutable. This assumption seems safe. - */ - type = PGT_l1_page_table | PGT_va_mutable; - - pin_page: - okay = get_page_and_type_from_pagenr(pfn, type, FOREIGNDOM); - if ( unlikely(!okay) ) - { - MEM_LOG("Error while pinning pfn %p", pfn); - break; - } - - if ( unlikely(test_and_set_bit(_PGT_pinned, - &page->u.inuse.type_info)) ) - { - MEM_LOG("Pfn %p already pinned", pfn); - put_page_and_type(page); - okay = 0; - break; - } - - break; + unsigned int deferred_ops; - case MMUEXT_PIN_L2_TABLE: - type = PGT_l2_page_table; - goto pin_page; + deferred_ops = percpu_info[cpu].deferred_ops; + percpu_info[cpu].deferred_ops = 0; -#ifdef __x86_64__ - case MMUEXT_PIN_L3_TABLE: - type = PGT_l3_page_table; - goto pin_page; + if ( deferred_ops & DOP_FLUSH_TLB ) + local_flush_tlb(); + + if ( deferred_ops & DOP_RELOAD_LDT ) + (void)map_ldt_shadow_page(0); - case MMUEXT_PIN_L4_TABLE: - type = PGT_l4_page_table; - goto pin_page; -#endif /* __x86_64__ */ + if ( unlikely(percpu_info[cpu].foreign != NULL) ) + { + put_domain(percpu_info[cpu].foreign); + percpu_info[cpu].foreign = NULL; + } +} - case MMUEXT_UNPIN_TABLE: - if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) ) - { - MEM_LOG("Page %p bad domain (dom=%p)", - ptr, page_get_owner(page)); - } - else if ( likely(test_and_clear_bit(_PGT_pinned, - &page->u.inuse.type_info)) ) - { - put_page_and_type(page); - put_page(page); - } - else - { - okay = 0; - put_page(page); - MEM_LOG("Pfn %p not pinned", pfn); - } - break; +static int set_foreigndom(unsigned int cpu, domid_t domid) +{ + struct domain *e, *d = current->domain; + int okay = 1; - case MMUEXT_NEW_BASEPTR: - okay = new_guest_cr3(pfn); - break; - -#ifdef __x86_64__ - case MMUEXT_NEW_USER_BASEPTR: - okay = get_page_and_type_from_pagenr(pfn, PGT_root_page_table, d); - if ( unlikely(!okay) ) - { - MEM_LOG("Error while installing new baseptr %p", pfn); - } - else - { - unsigned long old_pfn = - pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT; - ed->arch.guest_table_user = mk_pagetable(pfn << PAGE_SHIFT); - if ( old_pfn != 0 ) - put_page_and_type(&frame_table[old_pfn]); - } - break; -#endif - - case MMUEXT_TLB_FLUSH: - percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB; - break; + if ( (e = percpu_info[cpu].foreign) != NULL ) + put_domain(e); + percpu_info[cpu].foreign = NULL; - case MMUEXT_INVLPG: - __flush_tlb_one(ptr); - break; - - case MMUEXT_FLUSH_CACHE: - if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) ) - { - MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n"); - okay = 0; - } - else - { - wbinvd(); - } - break; + if ( domid == DOMID_SELF ) + goto out; - case MMUEXT_SET_LDT: + if ( !IS_PRIV(d) ) { - unsigned long ents = val >> MMUEXT_CMD_SHIFT; - if ( ((ptr & (PAGE_SIZE-1)) != 0) || - (ents > 8192) || - ((ptr+ents*LDT_ENTRY_SIZE) < ptr) || - ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) ) + switch ( domid ) { + case DOMID_IO: + get_knownalive_domain(dom_io); + percpu_info[cpu].foreign = dom_io; + break; + default: + MEM_LOG("Dom %u cannot set foreign dom\n", d->id); okay = 0; - MEM_LOG("Bad args to SET_LDT: ptr=%p, ents=%p", ptr, ents); - } - else if ( (ed->arch.ldt_ents != ents) || - (ed->arch.ldt_base != ptr) ) - { - invalidate_shadow_ldt(ed); - ed->arch.ldt_base = ptr; - ed->arch.ldt_ents = ents; - load_LDT(ed); - percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; - if ( ents != 0 ) - percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; + break; } - break; } - - case MMUEXT_SET_FOREIGNDOM: - domid = (domid_t)(val >> 16); - - if ( (e = percpu_info[cpu].foreign) != NULL ) - put_domain(e); - percpu_info[cpu].foreign = NULL; - - if ( !IS_PRIV(d) ) + else + { + percpu_info[cpu].foreign = e = find_domain_by_id(domid); + if ( e == NULL ) { switch ( domid ) { + case DOMID_XEN: + get_knownalive_domain(dom_xen); + percpu_info[cpu].foreign = dom_xen; + break; case DOMID_IO: get_knownalive_domain(dom_io); percpu_info[cpu].foreign = dom_io; break; default: - MEM_LOG("Dom %u cannot set foreign dom\n", d->id); + MEM_LOG("Unknown domain '%u'", domid); okay = 0; break; } } - else + } + + out: + return okay; +} + +int do_mmuext_op( + struct mmuext_op *uops, + unsigned int count, + unsigned int *pdone, + unsigned int foreigndom) +{ + struct mmuext_op op; + int rc = 0, i = 0, okay, cpu = smp_processor_id(); + unsigned int type, done = 0; + struct pfn_info *page; + struct exec_domain *ed = current; + struct domain *d = ed->domain, *e; + u32 x, y, _d, _nd; + + LOCK_BIGLOCK(d); + + cleanup_writable_pagetable(d); + + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) + { + count &= ~MMU_UPDATE_PREEMPTED; + if ( unlikely(pdone != NULL) ) + (void)get_user(done, pdone); + } + + if ( !set_foreigndom(cpu, foreigndom) ) + { + rc = -EINVAL; + goto out; + } + + if ( unlikely(!array_access_ok(VERIFY_READ, uops, count, sizeof(op))) ) + { + rc = -EFAULT; + goto out; + } + + for ( i = 0; i < count; i++ ) + { + if ( hypercall_preempt_check() ) { - percpu_info[cpu].foreign = e = find_domain_by_id(domid); - if ( e == NULL ) - { - switch ( domid ) - { - case DOMID_XEN: - get_knownalive_domain(dom_xen); - percpu_info[cpu].foreign = dom_xen; - break; - case DOMID_IO: - get_knownalive_domain(dom_io); - percpu_info[cpu].foreign = dom_io; - break; - default: - MEM_LOG("Unknown domain '%u'", domid); - okay = 0; - break; - } - } + rc = hypercall4_create_continuation( + __HYPERVISOR_mmuext_op, uops, + (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); + break; } - break; - case MMUEXT_TRANSFER_PAGE: - domid = (domid_t)(val >> 16); - gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF)); - - if ( unlikely(IS_XEN_HEAP_FRAME(page)) || - unlikely(!pfn_is_ram(pfn)) || - unlikely((e = find_domain_by_id(domid)) == NULL) ) + if ( unlikely(__copy_from_user(&op, uops, sizeof(op)) != 0) ) { - MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid); - okay = 0; + MEM_LOG("Bad __copy_from_user"); + rc = -EFAULT; break; } - spin_lock(&d->page_alloc_lock); + okay = 1; + page = &frame_table[op.mfn]; - /* - * The tricky bit: atomically release ownership while there is just one - * benign reference to the page (PGC_allocated). If that reference - * disappears then the deallocation routine will safely spin. - */ - _d = pickle_domptr(d); - _nd = page->u.inuse._domain; - y = page->count_info; - do { - x = y; - if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != - (1|PGC_allocated)) || - unlikely(_nd != _d) ) + switch ( op.cmd ) + { + case MMUEXT_PIN_L1_TABLE: + /* + * We insist that, if you pin an L1 page, it's the first thing that + * you do to it. This is because we require the backptr to still be + * mutable. This assumption seems safe. + */ + type = PGT_l1_page_table | PGT_va_mutable; + + pin_page: + okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM); + if ( unlikely(!okay) ) { - MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p," - " caf=%08x, taf=%08x\n", page_to_pfn(page), - d, d->id, unpickle_domptr(_nd), x, - page->u.inuse.type_info); - spin_unlock(&d->page_alloc_lock); - put_domain(e); - return 0; + MEM_LOG("Error while pinning MFN %p", op.mfn); + break; } - __asm__ __volatile__( - LOCK_PREFIX "cmpxchg8b %2" - : "=d" (_nd), "=a" (y), - "=m" (*(volatile u64 *)(&page->count_info)) - : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); - } - while ( unlikely(_nd != _d) || unlikely(y != x) ); + + if ( unlikely(test_and_set_bit(_PGT_pinned, + &page->u.inuse.type_info)) ) + { + MEM_LOG("MFN %p already pinned", op.mfn); + put_page_and_type(page); + okay = 0; + break; + } + + break; - /* - * Unlink from 'd'. At least one reference remains (now anonymous), so - * noone else is spinning to try to delete this page from 'd'. - */ - d->tot_pages--; - list_del(&page->list); - - spin_unlock(&d->page_alloc_lock); + case MMUEXT_PIN_L2_TABLE: + type = PGT_l2_page_table; + goto pin_page; - spin_lock(&e->page_alloc_lock); +#ifdef __x86_64__ + case MMUEXT_PIN_L3_TABLE: + type = PGT_l3_page_table; + goto pin_page; - /* - * Check that 'e' will accept the page and has reservation headroom. - * Also, a domain mustn't have PGC_allocated pages when it is dying. - */ - ASSERT(e->tot_pages <= e->max_pages); - if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || - unlikely(e->tot_pages == e->max_pages) || - unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) ) - { - MEM_LOG("Transferee has no reservation headroom (%d,%d), or " - "provided a bad grant ref, or is dying (%p).\n", - e->tot_pages, e->max_pages, e->d_flags); - spin_unlock(&e->page_alloc_lock); - put_domain(e); - okay = 0; + case MMUEXT_PIN_L4_TABLE: + type = PGT_l4_page_table; + goto pin_page; +#endif /* __x86_64__ */ + + case MMUEXT_UNPIN_TABLE: + if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) ) + { + MEM_LOG("MFN %p bad domain (dom=%p)", + op.mfn, page_get_owner(page)); + } + else if ( likely(test_and_clear_bit(_PGT_pinned, + &page->u.inuse.type_info)) ) + { + put_page_and_type(page); + put_page(page); + } + else + { + okay = 0; + put_page(page); + MEM_LOG("MFN %p not pinned", op.mfn); + } break; - } - /* Okay, add the page to 'e'. */ - if ( unlikely(e->tot_pages++ == 0) ) - get_knownalive_domain(e); - list_add_tail(&page->list, &e->page_list); - page_set_owner(page, e); + case MMUEXT_NEW_BASEPTR: + okay = new_guest_cr3(op.mfn); + break; + +#ifdef __x86_64__ + case MMUEXT_NEW_USER_BASEPTR: + okay = get_page_and_type_from_pagenr( + op.mfn, PGT_root_page_table, d); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new MFN %p", op.mfn); + } + else + { + unsigned long old_mfn = + pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT; + ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT); + if ( old_mfn != 0 ) + put_page_and_type(&frame_table[old_mfn]); + } + break; +#endif + + case MMUEXT_TLB_FLUSH_LOCAL: + percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB; + break; + + case MMUEXT_INVLPG_LOCAL: + __flush_tlb_one(op.linear_addr); + break; - spin_unlock(&e->page_alloc_lock); + case MMUEXT_TLB_FLUSH_MULTI: + flush_tlb_mask(d->cpuset); /* XXX KAF XXX */ + break; + + case MMUEXT_INVLPG_MULTI: + flush_tlb_mask(d->cpuset); /* XXX KAF XXX */ + break; - /* Transfer is all done: tell the guest about its new page frame. */ - gnttab_notify_transfer(e, gntref, pfn); - - put_domain(e); - break; + case MMUEXT_TLB_FLUSH_ALL: + flush_tlb_mask(d->cpuset); + break; + + case MMUEXT_INVLPG_ALL: + flush_tlb_mask(d->cpuset); /* XXX KAF XXX */ + break; - case MMUEXT_REASSIGN_PAGE: - if ( unlikely(!IS_PRIV(d)) ) - { - MEM_LOG("Dom %u has no reassignment priv", d->id); - okay = 0; + case MMUEXT_FLUSH_CACHE: + if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) ) + { + MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n"); + okay = 0; + } + else + { + wbinvd(); + } break; - } - e = percpu_info[cpu].foreign; - if ( unlikely(e == NULL) ) + case MMUEXT_SET_LDT: { - MEM_LOG("No FOREIGNDOM to reassign pfn %p to", pfn); - okay = 0; + unsigned long ptr = op.linear_addr; + unsigned long ents = op.nr_ents; + if ( ((ptr & (PAGE_SIZE-1)) != 0) || + (ents > 8192) || + ((ptr+ents*LDT_ENTRY_SIZE) < ptr) || + ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) ) + { + okay = 0; + MEM_LOG("Bad args to SET_LDT: ptr=%p, ents=%p", ptr, ents); + } + else if ( (ed->arch.ldt_ents != ents) || + (ed->arch.ldt_base != ptr) ) + { + invalidate_shadow_ldt(ed); + ed->arch.ldt_base = ptr; + ed->arch.ldt_ents = ents; + load_LDT(ed); + percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; + if ( ents != 0 ) + percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; + } break; } - /* - * Grab both page_list locks, in order. This prevents the page from - * disappearing elsewhere while we modify the owner, and we'll need - * both locks if we're successful so that we can change lists. - */ - if ( d < e ) - { - spin_lock(&d->page_alloc_lock); - spin_lock(&e->page_alloc_lock); - } - else - { - spin_lock(&e->page_alloc_lock); - spin_lock(&d->page_alloc_lock); - } - - /* A domain shouldn't have PGC_allocated pages when it is dying. */ - if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || - unlikely(IS_XEN_HEAP_FRAME(page)) ) - { - MEM_LOG("Reassignment page is Xen heap, or dest dom is dying."); - okay = 0; - goto reassign_fail; - } - - /* - * The tricky bit: atomically change owner while there is just one - * benign reference to the page (PGC_allocated). If that reference - * disappears then the deallocation routine will safely spin. - */ - _d = pickle_domptr(d); - _nd = page->u.inuse._domain; - y = page->count_info; - do { - x = y; - if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != - (1|PGC_allocated)) || - unlikely(_nd != _d) ) + case MMUEXT_REASSIGN_PAGE: + if ( unlikely(!IS_PRIV(d)) ) { - MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p," - " caf=%08x, taf=%08x\n", page_to_pfn(page), - d, d->id, unpickle_domptr(_nd), x, - page->u.inuse.type_info); + MEM_LOG("Dom %u has no reassignment priv", d->id); + okay = 0; + break; + } + + e = percpu_info[cpu].foreign; + if ( unlikely(e == NULL) ) + { + MEM_LOG("No FOREIGNDOM to reassign MFN %p to", op.mfn); + okay = 0; + break; + } + + /* + * Grab both page_list locks, in order. This prevents the page from + * disappearing elsewhere while we modify the owner, and we'll need + * both locks if we're successful so that we can change lists. + */ + if ( d < e ) + { + spin_lock(&d->page_alloc_lock); + spin_lock(&e->page_alloc_lock); + } + else + { + spin_lock(&e->page_alloc_lock); + spin_lock(&d->page_alloc_lock); + } + + /* A domain shouldn't have PGC_allocated pages when it is dying. */ + if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || + unlikely(IS_XEN_HEAP_FRAME(page)) ) + { + MEM_LOG("Reassign page is Xen heap, or dest dom is dying."); okay = 0; goto reassign_fail; } - __asm__ __volatile__( - LOCK_PREFIX "cmpxchg8b %3" - : "=d" (_nd), "=a" (y), "=c" (e), - "=m" (*(volatile u64 *)(&page->count_info)) - : "0" (_d), "1" (x), "c" (e), "b" (x) ); - } - while ( unlikely(_nd != _d) || unlikely(y != x) ); - - /* - * Unlink from 'd'. We transferred at least one reference to 'e', so - * noone else is spinning to try to delete this page from 'd'. - */ - d->tot_pages--; - list_del(&page->list); - - /* - * Add the page to 'e'. Someone may already have removed the last - * reference and want to remove the page from 'e'. However, we have - * the lock so they'll spin waiting for us. - */ - if ( unlikely(e->tot_pages++ == 0) ) - get_knownalive_domain(e); - list_add_tail(&page->list, &e->page_list); - reassign_fail: - spin_unlock(&d->page_alloc_lock); - spin_unlock(&e->page_alloc_lock); - break; + /* + * The tricky bit: atomically change owner while there is just one + * benign reference to the page (PGC_allocated). If that reference + * disappears then the deallocation routine will safely spin. + */ + _d = pickle_domptr(d); + _nd = page->u.inuse._domain; + y = page->count_info; + do { + x = y; + if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != + (1|PGC_allocated)) || + unlikely(_nd != _d) ) + { + MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p," + " caf=%08x, taf=%08x\n", page_to_pfn(page), + d, d->id, unpickle_domptr(_nd), x, + page->u.inuse.type_info); + okay = 0; + goto reassign_fail; + } + __asm__ __volatile__( + LOCK_PREFIX "cmpxchg8b %3" + : "=d" (_nd), "=a" (y), "=c" (e), + "=m" (*(volatile u64 *)(&page->count_info)) + : "0" (_d), "1" (x), "c" (e), "b" (x) ); + } + while ( unlikely(_nd != _d) || unlikely(y != x) ); + + /* + * Unlink from 'd'. We transferred at least one reference to 'e', + * so noone else is spinning to try to delete this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + /* + * Add the page to 'e'. Someone may already have removed the last + * reference and want to remove the page from 'e'. However, we have + * the lock so they'll spin waiting for us. + */ + if ( unlikely(e->tot_pages++ == 0) ) + get_knownalive_domain(e); + list_add_tail(&page->list, &e->page_list); + + reassign_fail: + spin_unlock(&d->page_alloc_lock); + spin_unlock(&e->page_alloc_lock); + break; + + default: + MEM_LOG("Invalid extended pt command 0x%p", op.cmd); + okay = 0; + break; + } - case MMUEXT_CLEAR_FOREIGNDOM: - if ( (e = percpu_info[cpu].foreign) != NULL ) - put_domain(e); - percpu_info[cpu].foreign = NULL; - break; + if ( unlikely(!okay) ) + { + rc = -EINVAL; + break; + } - default: - MEM_LOG("Invalid extended pt command 0x%p", val & MMUEXT_CMD_MASK); - okay = 0; - break; + uops++; } - return okay; + out: + process_deferred_ops(cpu); + + /* Add incremental work we have done to the @done output parameter. */ + if ( unlikely(pdone != NULL) ) + __put_user(done + i, pdone); + + UNLOCK_BIGLOCK(d); + return rc; } int do_mmu_update( - mmu_update_t *ureqs, unsigned int count, unsigned int *pdone) + mmu_update_t *ureqs, + unsigned int count, + unsigned int *pdone, + unsigned int foreigndom) { -/* - * We steal the m.s.b. of the @count parameter to indicate whether this - * invocation of do_mmu_update() is resuming a previously preempted call. - * We steal the next 15 bits to remember the current FOREIGNDOM. - */ -#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) -#define MMU_UPDATE_PREEMPT_FDOM_SHIFT ((sizeof(int)*8)-16) -#define MMU_UPDATE_PREEMPT_FDOM_MASK (0x7FFFU<domain; u32 type_info; - domid_t domid; LOCK_BIGLOCK(d); @@ -1666,31 +1677,17 @@ int do_mmu_update( if ( unlikely(shadow_mode_translate(d)) ) domain_crash_synchronous(); - /* - * If we are resuming after preemption, read how much work we have already - * done. This allows us to set the @done output parameter correctly. - * We also reset FOREIGNDOM here. - */ - if ( unlikely(count&(MMU_UPDATE_PREEMPTED|MMU_UPDATE_PREEMPT_FDOM_MASK)) ) + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { - if ( !(count & MMU_UPDATE_PREEMPTED) ) - { - /* Count overflow into private FOREIGNDOM field. */ - MEM_LOG("do_mmu_update count is too large"); - rc = -EINVAL; - goto out; - } count &= ~MMU_UPDATE_PREEMPTED; - domid = count >> MMU_UPDATE_PREEMPT_FDOM_SHIFT; - count &= ~MMU_UPDATE_PREEMPT_FDOM_MASK; if ( unlikely(pdone != NULL) ) (void)get_user(done, pdone); - if ( (domid != current->domain->id) && - !do_extended_command(0, MMUEXT_SET_FOREIGNDOM | (domid << 16)) ) - { - rc = -EINVAL; - goto out; - } + } + + if ( !set_foreigndom(cpu, foreigndom) ) + { + rc = -EINVAL; + goto out; } perfc_incrc(calls_to_mmu_update); @@ -1707,11 +1704,9 @@ int do_mmu_update( { if ( hypercall_preempt_check() ) { - rc = hypercall3_create_continuation( + rc = hypercall4_create_continuation( __HYPERVISOR_mmu_update, ureqs, - (count - i) | - (FOREIGNDOM->id << MMU_UPDATE_PREEMPT_FDOM_SHIFT) | - MMU_UPDATE_PREEMPTED, pdone); + (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); break; } @@ -1863,15 +1858,6 @@ int do_mmu_update( put_page(&frame_table[pfn]); break; - /* - * MMU_EXTENDED_COMMAND: Extended command is specified - * in the least-siginificant bits of the 'value' field. - */ - case MMU_EXTENDED_COMMAND: - req.ptr &= ~(sizeof(l1_pgentry_t) - 1); - okay = do_extended_command(req.ptr, req.val); - break; - default: MEM_LOG("Invalid page update command %p", req.ptr); break; @@ -1893,20 +1879,7 @@ int do_mmu_update( if ( unlikely(prev_spl1e != 0) ) unmap_domain_mem((void *)prev_spl1e); - deferred_ops = percpu_info[cpu].deferred_ops; - percpu_info[cpu].deferred_ops = 0; - - if ( deferred_ops & DOP_FLUSH_TLB ) - local_flush_tlb(); - - if ( deferred_ops & DOP_RELOAD_LDT ) - (void)map_ldt_shadow_page(0); - - if ( unlikely(percpu_info[cpu].foreign != NULL) ) - { - put_domain(percpu_info[cpu].foreign); - percpu_info[cpu].foreign = NULL; - } + process_deferred_ops(cpu); /* Add incremental work we have done to the @done output parameter. */ if ( unlikely(pdone != NULL) ) @@ -2016,11 +1989,10 @@ int do_update_va_mapping(unsigned long va, unsigned long val, unsigned long flags) { - struct exec_domain *ed = current; - struct domain *d = ed->domain; - unsigned int cpu = ed->processor; - unsigned long deferred_ops; - int rc = 0; + struct exec_domain *ed = current; + struct domain *d = ed->domain; + unsigned int cpu = ed->processor; + int rc = 0; perfc_incrc(calls_to_update_va); @@ -2046,17 +2018,25 @@ int do_update_va_mapping(unsigned long va, if ( unlikely(shadow_mode_enabled(d)) ) update_shadow_va_mapping(va, val, ed, d); - deferred_ops = percpu_info[cpu].deferred_ops; - percpu_info[cpu].deferred_ops = 0; - - if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || - unlikely(flags & UVMF_FLUSH_TLB) ) + switch ( flags & UVMF_FLUSH_MASK ) + { + case UVMF_TLB_FLUSH_LOCAL: local_flush_tlb(); - else if ( unlikely(flags & UVMF_INVLPG) ) + percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; + break; + case UVMF_TLB_FLUSH_ALL: + flush_tlb_mask(d->cpuset); + percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; + break; + case UVMF_INVLPG_LOCAL: __flush_tlb_one(va); + break; + case UVMF_INVLPG_ALL: + flush_tlb_mask(d->cpuset); /* XXX KAF XXX */ + break; + } - if ( unlikely(deferred_ops & DOP_RELOAD_LDT) ) - (void)map_ldt_shadow_page(0); + process_deferred_ops(cpu); UNLOCK_BIGLOCK(d); @@ -2084,9 +2064,6 @@ int do_update_va_mapping_otherdomain(unsigned long va, rc = do_update_va_mapping(va, val, flags); - put_domain(d); - percpu_info[cpu].foreign = NULL; - return rc; } @@ -3176,6 +3153,97 @@ void audit_domains_key(unsigned char key) #endif /* NDEBUG */ +/* Graveyard: stuff below may be useful in future. */ +#if 0 + case MMUEXT_TRANSFER_PAGE: + domid = (domid_t)(val >> 16); + gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF)); + + if ( unlikely(IS_XEN_HEAP_FRAME(page)) || + unlikely(!pfn_is_ram(pfn)) || + unlikely((e = find_domain_by_id(domid)) == NULL) ) + { + MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid); + okay = 0; + break; + } + + spin_lock(&d->page_alloc_lock); + + /* + * The tricky bit: atomically release ownership while there is just one + * benign reference to the page (PGC_allocated). If that reference + * disappears then the deallocation routine will safely spin. + */ + _d = pickle_domptr(d); + _nd = page->u.inuse._domain; + y = page->count_info; + do { + x = y; + if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != + (1|PGC_allocated)) || + unlikely(_nd != _d) ) + { + MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p," + " caf=%08x, taf=%08x\n", page_to_pfn(page), + d, d->id, unpickle_domptr(_nd), x, + page->u.inuse.type_info); + spin_unlock(&d->page_alloc_lock); + put_domain(e); + return 0; + } + __asm__ __volatile__( + LOCK_PREFIX "cmpxchg8b %2" + : "=d" (_nd), "=a" (y), + "=m" (*(volatile u64 *)(&page->count_info)) + : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); + } + while ( unlikely(_nd != _d) || unlikely(y != x) ); + + /* + * Unlink from 'd'. At least one reference remains (now anonymous), so + * noone else is spinning to try to delete this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + spin_unlock(&d->page_alloc_lock); + + spin_lock(&e->page_alloc_lock); + + /* + * Check that 'e' will accept the page and has reservation headroom. + * Also, a domain mustn't have PGC_allocated pages when it is dying. + */ + ASSERT(e->tot_pages <= e->max_pages); + if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || + unlikely(e->tot_pages == e->max_pages) || + unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) ) + { + MEM_LOG("Transferee has no reservation headroom (%d,%d), or " + "provided a bad grant ref, or is dying (%p).\n", + e->tot_pages, e->max_pages, e->d_flags); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + okay = 0; + break; + } + + /* Okay, add the page to 'e'. */ + if ( unlikely(e->tot_pages++ == 0) ) + get_knownalive_domain(e); + list_add_tail(&page->list, &e->page_list); + page_set_owner(page, e); + + spin_unlock(&e->page_alloc_lock); + + /* Transfer is all done: tell the guest about its new page frame. */ + gnttab_notify_transfer(e, gntref, pfn); + + put_domain(e); + break; +#endif + /* * Local variables: * mode: C diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S index c7192d9f6a..cb3eabb104 100644 --- a/xen/arch/x86/x86_32/entry.S +++ b/xen/arch/x86/x86_32/entry.S @@ -742,6 +742,8 @@ ENTRY(hypercall_table) .long SYMBOL_NAME(do_update_va_mapping_otherdomain) .long SYMBOL_NAME(do_switch_vm86) .long SYMBOL_NAME(do_boot_vcpu) + .long SYMBOL_NAME(do_ni_hypercall) /* 25 */ + .long SYMBOL_NAME(do_mmuext_op) .rept NR_hypercalls-((.-hypercall_table)/4) .long SYMBOL_NAME(do_ni_hypercall) .endr diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index e3a522e76a..be6572622d 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -449,6 +449,7 @@ ENTRY(hypercall_table) .quad SYMBOL_NAME(do_switch_to_user) .quad SYMBOL_NAME(do_boot_vcpu) .quad SYMBOL_NAME(do_set_segment_base) /* 25 */ + .quad SYMBOL_NAME(do_mmuext_op) .rept NR_hypercalls-((.-hypercall_table)/4) .quad SYMBOL_NAME(do_ni_hypercall) .endr diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 145dc82a32..0ec17675c2 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -58,6 +58,7 @@ #define __HYPERVISOR_switch_to_user 23 /* x86/64 only */ #define __HYPERVISOR_boot_vcpu 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 /* * MULTICALLS @@ -86,14 +87,10 @@ * MMU-UPDATE REQUESTS * * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. * ptr[1:0] specifies the appropriate MMU_* command. * - * FOREIGN DOMAIN (FD) - * ------------------- - * Some commands recognise an explicitly-declared foreign domain, - * in which case they will operate with respect to the foreigner rather than - * the calling domain. Where the FD has some effect, it is described below. - * * ptr[1:0] == MMU_NORMAL_PT_UPDATE: * Updates an entry in a page table. If updating an L1 table, and the new * table entry is valid/present, the mapped frame must belong to the FD, if @@ -109,61 +106,58 @@ * ptr[:2] -- Machine address within the frame whose mapping to modify. * The frame must belong to the FD, if one is specified. * val -- Value to write into the mapping entry. - * - * ptr[1:0] == MMU_EXTENDED_COMMAND: - * val[7:0] -- MMUEXT_* command. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ + +/* + * MMU EXTENDED OPERATIONS * - * val[7:0] == MMUEXT_(UN)PIN_*_TABLE: - * ptr[:2] -- Machine address of frame to be (un)pinned as a p.t. page. - * The frame must belong to the FD, if one is specified. + * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. * - * val[7:0] == MMUEXT_NEW_BASEPTR: - * ptr[:2] -- Machine address of new page-table base to install in MMU. + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. * - * val[7:0] == MMUEXT_NEW_USER_BASEPTR: [x86/64 only] - * ptr[:2] -- Machine address of new page-table base to install in MMU - * when in user space. + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. * - * val[7:0] == MMUEXT_TLB_FLUSH_LOCAL: - * No additional arguments. Flushes local TLB. + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. * - * val[7:0] == MMUEXT_INVLPG_LOCAL: - * ptr[:2] -- Linear address to be flushed from the local TLB. + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. * - * val[7:0] == MMUEXT_FLUSH_CACHE: - * No additional arguments. Writes back and flushes cache contents. + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. * - * val[7:0] == MMUEXT_SET_LDT: - * ptr[:2] -- Linear address of LDT base (NB. must be page-aligned). - * val[:8] -- Number of entries in LDT. + * cmd: MMUEXT_TLB_FLUSH_MULTI + * cpuset: Set of VCPUs to be flushed. * - * val[7:0] == MMUEXT_TRANSFER_PAGE: - * val[31:16] -- Domain to whom page is to be transferred. - * (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table. - * ptr[:12] -- Page frame to be reassigned to the FD. - * (NB. The frame must currently belong to the calling domain). + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * cpuset: Set of VCPUs to be flushed. * - * val[7:0] == MMUEXT_SET_FOREIGNDOM: - * val[31:16] -- Domain to set as the Foreign Domain (FD). - * (NB. DOMID_SELF is not recognised) - * If FD != DOMID_IO then the caller must be privileged. + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. * - * val[7:0] == MMUEXT_CLEAR_FOREIGNDOM: - * Clears the FD. + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. * - * val[7:0] == MMUEXT_REASSIGN_PAGE: - * ptr[:2] -- A machine address within the page to be reassigned to the FD. - * (NB. page must currently belong to the calling domain). + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. * - * val[7:0] == MMUEXT_TLB_FLUSH_MULTI: - * Flush TLBs of VCPUs specified in @mask. + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. * - * val[7:0] == MMUEXT_INVLPG_MULTI: - * ptr[:2] -- Linear address to be flushed from TLB of VCPUs in @mask. + * cmd: MMUEXT_REASSIGN_PAGE + * mfn: Machine frame number to be reassigned to the FD. + * (NB. page must currently belong to the calling domain). */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */ -#define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */ #define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */ #define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */ #define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */ @@ -172,25 +166,39 @@ #define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */ #define MMUEXT_TLB_FLUSH_LOCAL 6 /* ptr = NULL */ #define MMUEXT_INVLPG_LOCAL 7 /* ptr = VA to invalidate */ -#define MMUEXT_FLUSH_CACHE 8 -#define MMUEXT_SET_LDT 9 /* ptr = VA of table; val = # entries */ -#define MMUEXT_SET_FOREIGNDOM 10 /* val[31:16] = dom */ -#define MMUEXT_CLEAR_FOREIGNDOM 11 -#define MMUEXT_TRANSFER_PAGE 12 /* ptr = MA of frame; val[31:16] = dom */ -#define MMUEXT_REASSIGN_PAGE 13 -#define MMUEXT_NEW_USER_BASEPTR 14 -#define MMUEXT_TLB_FLUSH_MULTI 15 /* ptr = NULL; mask = VCPUs to flush */ -#define MMUEXT_INVLPG_MULTI 16 /* ptr = VA to inval.; mask = VCPUs */ -#define MMUEXT_CMD_MASK 255 -#define MMUEXT_CMD_SHIFT 8 +#define MMUEXT_TLB_FLUSH_MULTI 8 /* ptr = NULL; mask = VCPUs to flush */ +#define MMUEXT_INVLPG_MULTI 9 /* ptr = VA to inval.; mask = VCPUs */ +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 /* ptr = VA of table; val = # entries */ +#define MMUEXT_REASSIGN_PAGE 14 +#define MMUEXT_NEW_USER_BASEPTR 15 -/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ -#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ -#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, REASSIGN_PAGE */ + memory_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + memory_t linear_addr; + }; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ + unsigned long cpuset; + }; +}; +#endif -/* Backwards source compatibility. */ -#define MMUEXT_TLB_FLUSH MMUEXT_TLB_FLUSH_LOCAL -#define MMUEXT_INVLPG MMUEXT_INVLPG_LOCAL +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +#define UVMF_TLB_FLUSH_LOCAL 1 /* Flush local CPU's TLB. */ +#define UVMF_INVLPG_LOCAL 2 /* Flush VA from local CPU's TLB. */ +#define UVMF_TLB_FLUSH_ALL 3 /* Flush all TLBs. */ +#define UVMF_INVLPG_ALL 4 /* Flush VA from all TLBs. */ +#define UVMF_FLUSH_MASK 7 /* * Commands to HYPERVISOR_sched_op(). @@ -270,7 +278,6 @@ typedef struct { memory_t ptr; /* Machine address of PTE. */ memory_t val; /* New contents of PTE. */ - /*unsigned long mask;*/ /* VCPU mask (certain extended commands). */ } PACKED mmu_update_t; /* -- 2.30.2